df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
## [1] "player" "dunk_tot" "dunk_pct" "rim_tot"
## [5] "rim_pct" "rim_asted" "other2pt_tot" "other2pt_pct"
## [9] "other2pt_asted" "3pt_tot" "3pt_pct" "3pt_asted"
## [13] "games" "mp_per_g" "fg_per_g" "fga_per_g"
## [17] "fg_pct" "fg2_per_g" "fg2a_per_g" "fg2_pct"
## [21] "fg3_per_g" "fg3a_per_g" "fg3_pct" "ft_per_g"
## [25] "fta_per_g" "ft_pct" "orb_per_g" "drb_per_g"
## [29] "trb_per_g" "ast_per_g" "stl_per_g" "blk_per_g"
## [33] "tov_per_g" "pts_per_g"
path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')
df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(drop_cols)
##
## # Now:
## data %>% select(all_of(drop_cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)
colnames(df_career_stats)
## [1] "pick_overall" "player" "college_name" "seasons" "g"
## [6] "fg_pct" "fg3_pct" "ft_pct" "mp_per_g" "pts_per_g"
## [11] "trb_per_g" "ast_per_g" "ws" "ws_per_48" "bpm"
## [16] "vorp" "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
pra_per_g = pts_per_g + trb_per_g + ast_per_g,
pick_overall = factor(pick_overall)
)
# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
summarize(avg_mpg = mean(mp_per_g),
avg_ppg = mean(pts_per_g),
avg_trbpg = mean(trb_per_g),
avg_apg = mean(ast_per_g),
avg_prapg = mean(pra_per_g))
print(draft_means, n = 14)
## # A tibble: 14 × 6
## pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 31.2 18.8 6.52 4.32 29.7
## 2 2 26.9 13.7 4.96 2.88 21.6
## 3 3 30.0 17.6 6.3 3.52 27.4
## 4 4 27.5 12.8 5.39 2.48 20.7
## 5 5 26.0 12.7 4.92 3.55 21.2
## 6 6 23.2 10.2 4.35 2.26 16.8
## 7 7 27.6 13.4 4.88 2.77 21.0
## 8 8 21.1 8.82 3.37 1.77 14.0
## 9 9 24.5 10.9 4.55 2.52 18.0
## 10 10 23.5 10.4 3.61 2.31 16.3
## 11 11 21.1 10.2 3.69 2.16 16.0
## 12 12 24.6 10.6 4.47 2.23 17.3
## 13 13 22.6 10.9 3.99 2.23 17.1
## 14 14 20.3 8.78 3.75 1.3 13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
geom_boxplot() +
labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")
df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |>
separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))
df2 <- df2 |>
separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))
df2 <- df2 |>
separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))
df2 <-df2 |>
mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))
df2 <- df2 |> mutate(
vorp_per_g = vorp / g
)
df_top_players <- df2 |>
group_by(pick_overall) |>
summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))
df_bottom_players <- df2 |>
group_by(pick_overall) |>
summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))
# metric favors big men
# make the rebound percentile higher
is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
ppg <- df_top_players |> pull(pts_per_g_nba)
apg <- df_top_players |> pull(ast_per_g_nba)
prapg <- df_top_players |> pull(pra_per_g)
df_top_rb <- df2 |>
group_by(pick_overall) |>
summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
rpg <- df_top_rb |> pull(trb_per_g)
df_top_vorp <- df2 |>
group_by(pick_overall) |>
summarize(vorp_per_g = median(vorp_per_g))
vorppg <- df_top_vorp |> pull(vorp_per_g)
df <- df2 |> filter(pick_overall == pick_number) |>
filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
filter(vorp_per_g >= vorppg[[pick_number]])
# must also have played at least most of their career in the nba
df <- df |> filter(seasons >= 4 / 5 * (2023-year))
return(df)
}
is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
ppg <- df_bottom_players |> pull(pts_per_g_nba)
apg <- df_bottom_players |> pull(ast_per_g_nba)
prapg <- df_bottom_players |> pull(pra_per_g)
df_bottom_rb <- df2 |>
group_by(pick_overall) |>
summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
rpg <- df_bottom_rb |> pull(trb_per_g)
df_bottom_vorp <- df2 |>
group_by(pick_overall) |>
summarize(vorp_per_g = quantile(vorp_per_g, probs = 0.3))
vorppg <- df_bottom_vorp |> pull(vorp_per_g)
# playing less than half the seasons since drafted makes you a bust
df <- df2 |> filter(pick_overall == pick_number) |>
filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)
# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)
df_pick_1
## # A tibble: 7 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 John Wall 33 36 91.7% 116 182 63.7%
## 2 Kyrie Irving 0 0 0% 26 39 66.7%
## 3 Anthony Davis 96 98 98.0% 152 174 87.4%
## 4 Karl-Anthony T… 22 24 91.7% 87 121 71.9%
## 5 Ben Simmons 56 61 91.8% 159 220 72.3%
## 6 Zion Williamson 72 79 91.1% 247 313 78.9%
## 7 Anthony Edwards 27 27 100.0% 89 129 69.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Anthony Bennett 53 58 91.4% 100 140 71.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)
df_pick_2
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 D'Angelo Russe… 4 4 100.0% 70 110 63.6%
## 2 Brandon Ingram 17 17 100.0% 69 117 59.0%
## 3 Lonzo Ball 37 40 92.5% 94 120 78.3%
## 4 Ja Morant 28 31 90.3% 160 264 60.6%
## 5 Chet Holmgren 57 57 100.0% 105 125 84.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Derrick Willia… 56 60 93.3% 135 188 71.8%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)
df_pick_3
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Bradley Beal 18 20 90.0% 89 137 65.0%
## 2 Joel Embiid 30 30 100.0% 80 99 80.8%
## 3 Jayson Tatum 18 21 85.7% 79 126 62.7%
## 4 Evan Mobley 63 66 95.5% 113 144 78.5%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Jahlil Okafor 64 67 95.5% 213 270 78.9%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)
df_pick_4
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Aaron Gordon 54 56 96.4% 137 198 69.2%
## 2 Jaren Jackson … 31 31 100.0% 61 93 65.6%
## 3 Scottie Barnes 19 21 90.5% 61 89 68.5%
## 4 Keegan Murray 63 67 94.0% 196 277 70.8%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## # dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## # rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## # 3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## # fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)
df_pick_5
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 DeMarcus Cousi… 53 57 93.0% 144 189 76.2%
## 2 De'Aaron Fox 20 21 95.2% 131 203 64.5%
## 3 Trae Young 0 0 0% 105 201 52.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Thomas Robinson 70 83 84.3% 169 262 64.5%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)
df_pick_6
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Damian Lillard 13 17 76.5% 98 169 58.0%
## 2 Nerlens Noel 48 50 96.0% 76 99 76.8%
## 3 Marcus Smart 16 18 88.9% 78 110 70.9%
## 4 Buddy Hield 18 22 81.8% 119 178 66.9%
## 5 Onyeka Okongwu 58 61 95.1% 135 186 72.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ekpe Udoh 30 32 93.8% 78 109 71.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)
df_pick_7
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Julius Randle 37 40 92.5% 132 197 67.0%
## 2 Jamal Murray 18 19 94.7% 77 111 69.4%
## 3 Lauri Markkanen 20 24 83.3% 65 100 65.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ben McLemore 44 45 97.8% 90 126 71.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)
df_pick_8
## # A tibble: 3 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Al-Farouq Aminu 46 48 95.8% 112 173 64.7%
## 2 Kentavious Cal… 15 16 93.8% 63 94 67.0%
## 3 Franz Wagner 11 11 100.0% 63 93 67.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## # dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## # rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## # 3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## # fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)
df_pick_9
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Gordon Hayward 19 20 95.0% 89 128 69.5%
## 2 Kemba Walker 3 3 100.0% 115 196 58.7%
## 3 Andre Drummond 80 89 89.9% 130 185 70.3%
## 4 Trey Burke 9 9 100.0% 67 105 63.8%
## 5 Jakob Poeltl 32 34 94.1% 199 284 70.1%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Kevin Knox 18 20 90.0% 65 99 65.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)
df_pick_10
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Paul George 18 22 81.8% 70 106 66.0%
## 2 CJ McCollum 3 3 100.0% 34 63 54.0%
## 3 Elfrid Payton 21 24 87.5% 169 247 68.4%
## 4 Mikal Bridges 35 42 83.3% 109 161 67.7%
## 5 Jalen Smith 49 52 94.2% 114 158 72.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ziaire Williams 10 11 90.9% 26 49 53.1%
## 2 Johnny Davis 16 19 84.2% 89 143 62.2%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)
df_pick_11
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Klay Thompson 8 8 100.0% 66 110 60.0%
## 2 Myles Turner 11 13 84.6% 40 54 74.1%
## 3 Domantas Sabon… 22 24 91.7% 157 214 73.4%
## 4 Shai Gilgeous-… 11 11 100.0% 108 182 59.3%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 James Bouknight 12 12 100.0% 52 79 65.8%
## 2 Jett Howard 6 6 100.0% 29 47 61.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)
df_pick_12
## # A tibble: 5 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Steven Adams 29 33 87.9% 85 129 65.9%
## 2 Miles Bridges 30 35 85.7% 84 128 65.6%
## 3 Tyrese Halibur… 7 8 87.5% 46 62 74.2%
## 4 Jalen Williams 25 27 92.6% 124 186 66.7%
## 5 Dereck Lively … 54 55 98.2% 74 96 77.1%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Xavier Henry 17 17 100.0% 60 90 66.7%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)
df_pick_13
## # A tibble: 7 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Ed Davis 26 27 96.3% 42 50 84.0%
## 2 Kelly Olynyk 25 28 89.3% 152 212 71.7%
## 3 Zach LaVine 21 25 84.0% 51 90 56.7%
## 4 Devin Booker 8 9 88.9% 42 59 71.2%
## 5 Donovan Mitche… 9 13 69.2% 64 116 55.2%
## 6 Tyler Herro 4 5 80.0% 56 84 66.7%
## 7 Jalen Duren 70 76 92.1% 111 152 73.0%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Kendall Marsha… 0 0 0% 35 53 66.0%
## 2 Jerome Robinson 12 13 92.3% 98 157 62.4%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)
df_pick_14
## # A tibble: 4 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Marcus Morris 31 33 93.9% 114 147 77.6%
## 2 T.J. Warren 37 37 100.0% 192 251 76.5%
## 3 Cameron Payne 3 3 100.0% 53 87 60.9%
## 4 Bam Adebayo 99 105 94.3% 138 185 74.6%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Romeo Langford 10 14 71.4% 91 138 65.9%
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes
df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)
df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)
print(df_good, n = 20)
## # A tibble: 64 × 55
## player dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
## 1 John Wall 33 36 91.7% 116 182 63.7%
## 2 Kyrie Irving 0 0 0% 26 39 66.7%
## 3 Anthony Davis 96 98 98.0% 152 174 87.4%
## 4 Karl-Anthony … 22 24 91.7% 87 121 71.9%
## 5 Ben Simmons 56 61 91.8% 159 220 72.3%
## 6 Zion Williams… 72 79 91.1% 247 313 78.9%
## 7 Anthony Edwar… 27 27 100.0% 89 129 69.0%
## 8 D'Angelo Russ… 4 4 100.0% 70 110 63.6%
## 9 Brandon Ingram 17 17 100.0% 69 117 59.0%
## 10 Lonzo Ball 37 40 92.5% 94 120 78.3%
## 11 Ja Morant 28 31 90.3% 160 264 60.6%
## 12 Chet Holmgren 57 57 100.0% 105 125 84.0%
## 13 Bradley Beal 18 20 90.0% 89 137 65.0%
## 14 Joel Embiid 30 30 100.0% 80 99 80.8%
## 15 Jayson Tatum 18 21 85.7% 79 126 62.7%
## 16 Evan Mobley 63 66 95.5% 113 144 78.5%
## 17 Aaron Gordon 54 56 96.4% 137 198 69.2%
## 18 Jaren Jackson… 31 31 100.0% 61 93 65.6%
## 19 Scottie Barnes 19 21 90.5% 61 89 68.5%
## 20 Keegan Murray 63 67 94.0% 196 277 70.8%
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## # `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## # mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## # fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## # fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
## [1] "John Wall" "Kyrie Irving"
## [3] "Anthony Davis" "Karl-Anthony Towns"
## [5] "Ben Simmons" "Zion Williamson"
## [7] "Anthony Edwards" "D'Angelo Russell"
## [9] "Brandon Ingram" "Lonzo Ball"
## [11] "Ja Morant" "Chet Holmgren"
## [13] "Bradley Beal" "Joel Embiid"
## [15] "Jayson Tatum" "Evan Mobley"
## [17] "Aaron Gordon" "Jaren Jackson Jr."
## [19] "Scottie Barnes" "Keegan Murray"
## [21] "DeMarcus Cousins" "De'Aaron Fox"
## [23] "Trae Young" "Damian Lillard"
## [25] "Nerlens Noel" "Marcus Smart"
## [27] "Buddy Hield" "Onyeka Okongwu"
## [29] "Julius Randle" "Jamal Murray"
## [31] "Lauri Markkanen" "Al-Farouq Aminu"
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"
## [35] "Gordon Hayward" "Kemba Walker"
## [37] "Andre Drummond" "Trey Burke"
## [39] "Jakob Poeltl" "Paul George"
## [41] "CJ McCollum" "Elfrid Payton"
## [43] "Mikal Bridges" "Jalen Smith"
## [45] "Klay Thompson" "Myles Turner"
## [47] "Domantas Sabonis" "Shai Gilgeous-Alexander"
## [49] "Steven Adams" "Miles Bridges"
## [51] "Tyrese Haliburton" "Jalen Williams"
## [53] "Dereck Lively II" "Ed Davis"
## [55] "Kelly Olynyk" "Zach LaVine"
## [57] "Devin Booker" "Donovan Mitchell"
## [59] "Tyler Herro" "Jalen Duren"
## [61] "Marcus Morris" "T.J. Warren"
## [63] "Cameron Payne" "Bam Adebayo"
good_list <- df_good |> pull(player)
df_busts |> pull(player)
## [1] "Anthony Bennett" "Derrick Williams" "Jahlil Okafor" "Thomas Robinson"
## [5] "Ekpe Udoh" "Ben McLemore" "Kevin Knox" "Ziaire Williams"
## [9] "Johnny Davis" "James Bouknight" "Jett Howard" "Xavier Henry"
## [13] "Kendall Marshall" "Jerome Robinson" "Romeo Langford"
bust_list <- df_busts |> pull(player)
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
geom_point(color = "green", size = 4, alpha = 0.5) +
geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
theme_bw()
plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
geom_point(color = "red", size = 4, alpha = 0.5) +
geom_label_repel(size = 3) +
labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
theme_bw()
plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
labs(title = "CBB Shot Selection for NBA Lottery Picks",
x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game", color = "Value") +
scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
theme_bw()
plot_combined
plot_busts
plot_good
library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
colnames(df2)
## [1] "player" "dunk_made" "dunk_attempts"
## [4] "dunk_pct" "rim_made" "rim_attempts"
## [7] "rim_pct" "rim_asted" "other2pt_made"
## [10] "other2pt_attempts" "other2pt_pct" "other2pt_asted"
## [13] "3pt_tot" "3pt_pct" "3pt_asted"
## [16] "games" "mp_per_g_college" "fg_per_g"
## [19] "fga_per_g" "fg_pct_college" "fg2_per_g"
## [22] "fg2a_per_g" "fg2_pct" "fg3_per_g"
## [25] "fg3a_per_g" "fg3_pct_college" "ft_per_g"
## [28] "fta_per_g" "ft_pct_college" "orb_per_g"
## [31] "drb_per_g" "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g" "blk_per_g" "tov_per_g"
## [37] "pts_per_g_college" "pick_overall" "college_name"
## [40] "seasons" "g" "fg_pct_nba"
## [43] "fg3_pct_nba" "ft_pct_nba" "mp_per_g_nba"
## [46] "pts_per_g_nba" "trb_per_g_nba" "ast_per_g_nba"
## [49] "ws" "ws_per_48" "bpm"
## [52] "vorp" "year" "pra_per_g"
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")
df_cbb <- df3 |> select(
dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g,
`3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |>
mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
rim_pct = parse_number(rim_pct) / 100,
rim_asted = parse_number(rim_asted) / 100,
other2pt_pct = parse_number(other2pt_pct) / 100,
other2pt_asted = parse_number(other2pt_asted) / 100,
fg3_asted = parse_number(fg3_asted) / 100,
fg3_pct_per_g = fg3_per_g / fg3a_per_g)
df_cbb <- df_cbb |>
mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
relocate(fg3_pct_per_g, .after = fg3_asted)
to_per_game <- function(x, games) {
x <- x / games
return(x)
}
df_cbb <- df_cbb |> # making everything per game
mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made,
other2pt_attempts), function(x) to_per_game(x, games)))
colSums(is.na(df_cbb))
## dunk_made dunk_attempts dunk_pct rim_made
## 0 0 0 0
## rim_attempts rim_pct rim_asted other2pt_made
## 0 0 0 0
## other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## 0 0 0 0
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g
## 0 0 0 0
## games ft_per_g fta_per_g ast_per_g_college
## 0 0 0 0
## orb_per_g drb_per_g stl_per_g blk_per_g
## 0 0 0 0
## tov_per_g pts_per_g_college
## 0 0
Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r
df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
## dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.338 0.359 0.125 0.886 1.15 -0.582 -0.936
## 2 -0.789 -0.813 0.332 -0.925 -1.11 1.48 -1.47
## 3 1.25 1.23 0.325 1.46 1.11 1.46 0.662
## 4 0.599 0.655 0.0208 0.478 0.382 0.591 0.733
## 5 1.13 1.15 0.215 1.44 1.18 1.16 0.633
## 6 0.246 0.233 0.270 0.0605 -0.0262 0.521 -0.0688
## 7 -0.560 -0.589 0.387 0.233 0.367 -0.443 0.0432
## 8 0.102 0.0619 0.408 -0.246 -0.174 -0.443 0.0668
## 9 -0.635 -0.661 0.353 -0.687 -0.754 0.228 0.615
## 10 -0.619 -0.573 -0.560 -0.839 -0.861 -0.261 -0.623
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## # other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## # fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## # fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## # stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)
corr_matrix <- cor(df_cbb_scaled)
colnames(corr_matrix)
## [1] "dunk_made" "dunk_attempts" "dunk_pct"
## [4] "rim_made" "rim_attempts" "rim_pct"
## [7] "rim_asted" "other2pt_made" "other2pt_attempts"
## [10] "other2pt_pct" "other2pt_asted" "fg2_pct"
## [13] "fg3_per_g" "fg3a_per_g" "fg3_asted"
## [16] "fg3_pct_per_g" "ft_per_g" "fta_per_g"
## [19] "ast_per_g_college" "orb_per_g" "drb_per_g"
## [22] "stl_per_g" "blk_per_g" "tov_per_g"
## [25] "pts_per_g_college"
ggcorrplot(corr_matrix, method = "square")
ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
type = "lower", tl.cex = 7, title = "Correlations between different college statistics")
Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f
df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
## dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## Mo Bamba 2.1666667 2.2666667 0.956 3.633333 4.766667 0.762
## Jaxson Hayes 2.3125000 2.3437500 0.987 3.281250 3.843750 0.854
## Harrison Barnes 0.4533333 0.4533333 1.000 0.880000 1.200000 0.733
## Thomas Robinson 0.6666667 0.7904762 0.843 1.609524 2.495238 0.645
## Brandon Miller 0.6216216 0.7027027 0.885 2.297297 3.972973 0.578
## Anthony Bennett 1.5142857 1.6571429 0.914 2.857143 4.000000 0.714
## rim_asted other2pt_made other2pt_attempts other2pt_pct
## Mo Bamba 0.532 0.7666667 2.500000 0.307
## Jaxson Hayes 0.829 0.5625000 1.437500 0.391
## Harrison Barnes 0.455 1.5066667 3.906667 0.386
## Thomas Robinson 0.627 0.7619048 2.180952 0.349
## Brandon Miller 0.259 0.8378378 2.513514 0.333
## Anthony Bennett 0.620 1.6000000 3.685714 0.434
## other2pt_asted fg2_pct fg3_per_g fg3a_per_g fg3_asted
## Mo Bamba 0.217 0.603 0.5 1.7 0.857
## Jaxson Hayes 0.333 0.728 0.0 0.0 0.000
## Harrison Barnes 0.204 0.469 1.5 4.4 0.837
## Thomas Robinson 0.500 0.525 0.1 0.1 0.857
## Brandon Miller 0.032 0.483 2.9 7.5 0.830
## Anthony Bennett 0.768 0.587 1.0 2.7 0.972
## fg3_pct_per_g games ft_per_g fta_per_g ast_per_g_college
## Mo Bamba 0.2941176 30 2.7 4.0 0.5
## Jaxson Hayes 0.0000000 32 2.3 3.1 0.3
## Harrison Barnes 0.3409091 75 3.1 4.3 1.3
## Thomas Robinson 1.0000000 105 2.2 3.6 1.0
## Brandon Miller 0.3866667 37 3.9 4.6 2.1
## Anthony Bennett 0.3703704 35 3.5 5.1 1.0
## orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## Mo Bamba 3.2 7.3 0.8 3.7 1.5
## Jaxson Hayes 1.8 3.3 0.6 2.2 0.9
## Harrison Barnes 2.0 3.5 0.9 0.4 1.9
## Thomas Robinson 2.1 5.2 0.6 0.7 1.6
## Brandon Miller 2.1 6.2 0.9 0.9 2.2
## Anthony Bennett 2.5 5.7 0.7 1.2 1.9
## pts_per_g_college
## Mo Bamba 12.9
## Jaxson Hayes 10.0
## Harrison Barnes 16.3
## Thomas Robinson 9.8
## Brandon Miller 18.8
## Anthony Bennett 16.1
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)
fviz_eig(df_cbb.pca, addlabels = TRUE, main = "Statistics Represented in Lower Dimensional Components")
fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)
var <- get_pca_var(df_cbb.pca)
fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2) +
labs(title = "Quality of Representation to PCA Dimensions 1 and 2")
fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1) +
labs(title = "NBA Lottery Picks on PCA Dimensions 1 and 2")
summary(df_cbb.pca)
##
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE)
##
##
## Eigenvalues
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Variance 7.916 5.341 2.094 1.495 1.261 1.201 1.002
## % of var. 31.663 21.364 8.376 5.978 5.045 4.803 4.008
## Cumulative % of var. 31.663 53.027 61.403 67.381 72.426 77.229 81.237
## Dim.8 Dim.9 Dim.10 Dim.11 Dim.12 Dim.13 Dim.14
## Variance 0.967 0.751 0.556 0.446 0.420 0.327 0.267
## % of var. 3.867 3.006 2.225 1.786 1.679 1.310 1.067
## Cumulative % of var. 85.104 88.110 90.335 92.121 93.800 95.110 96.177
## Dim.15 Dim.16 Dim.17 Dim.18 Dim.19 Dim.20 Dim.21
## Variance 0.258 0.213 0.164 0.141 0.072 0.062 0.024
## % of var. 1.032 0.850 0.655 0.563 0.286 0.249 0.097
## Cumulative % of var. 97.209 98.059 98.714 99.277 99.564 99.813 99.910
## Dim.22 Dim.23 Dim.24 Dim.25
## Variance 0.011 0.006 0.004 0.002
## % of var. 0.046 0.023 0.014 0.007
## Cumulative % of var. 99.955 99.979 99.993 100.000
##
## Individuals (the 10 first)
## Dist Dim.1 ctr cos2 Dim.2 ctr cos2
## Mo Bamba | 6.240 | 5.048 1.951 0.655 | 0.723 0.059 0.013 |
## Jaxson Hayes | 7.789 | 6.143 2.890 0.622 | -1.826 0.378 0.055 |
## Harrison Barnes | 3.113 | -1.045 0.084 0.113 | -0.397 0.018 0.016 |
## Thomas Robinson | 5.940 | 0.643 0.032 0.012 | -2.155 0.527 0.132 |
## Brandon Miller | 4.490 | -2.143 0.352 0.228 | 1.766 0.354 0.155 |
## Anthony Bennett | 4.389 | 3.010 0.694 0.470 | 0.997 0.113 0.052 |
## Steven Adams | 6.140 | 3.716 1.057 0.366 | -2.840 0.915 0.214 |
## Nerlens Noel | 7.353 | 5.366 2.205 0.533 | 0.882 0.088 0.014 |
## Cameron Johnson | 3.881 | -1.383 0.146 0.127 | -2.260 0.579 0.339 |
## Dennis Smith Jr. | 5.451 | -2.658 0.541 0.238 | 3.803 1.641 0.487 |
## Dim.3 ctr cos2
## Mo Bamba -0.148 0.006 0.001 |
## Jaxson Hayes -2.273 1.495 0.085 |
## Harrison Barnes 1.752 0.888 0.317 |
## Thomas Robinson 1.695 0.831 0.081 |
## Brandon Miller 0.902 0.235 0.040 |
## Anthony Bennett 2.370 1.626 0.292 |
## Steven Adams -2.476 1.774 0.163 |
## Nerlens Noel -3.100 2.781 0.178 |
## Cameron Johnson 1.558 0.702 0.161 |
## Dennis Smith Jr. -1.993 1.150 0.134 |
##
## Variables (the 10 first)
## Dim.1 ctr cos2 Dim.2 ctr cos2 Dim.3 ctr
## dunk_made | 0.835 8.802 0.697 | 0.336 2.117 0.113 | -0.025 0.029
## dunk_attempts | 0.829 8.675 0.687 | 0.338 2.140 0.114 | -0.025 0.030
## dunk_pct | 0.334 1.412 0.112 | -0.120 0.269 0.014 | 0.228 2.485
## rim_made | 0.584 4.305 0.341 | 0.672 8.468 0.452 | -0.088 0.370
## rim_attempts | 0.451 2.571 0.203 | 0.740 10.243 0.547 | -0.093 0.410
## rim_pct | 0.724 6.625 0.524 | -0.148 0.409 0.022 | -0.001 0.000
## rim_asted | 0.767 7.441 0.589 | -0.354 2.351 0.126 | 0.182 1.578
## other2pt_made | 0.106 0.141 0.011 | 0.601 6.772 0.362 | 0.560 14.964
## other2pt_attempts | 0.102 0.131 0.010 | 0.651 7.924 0.423 | 0.484 11.203
## other2pt_pct | 0.007 0.001 0.000 | -0.045 0.039 0.002 | 0.359 6.140
## cos2
## dunk_made 0.001 |
## dunk_attempts 0.001 |
## dunk_pct 0.052 |
## rim_made 0.008 |
## rim_attempts 0.009 |
## rim_pct 0.000 |
## rim_asted 0.033 |
## other2pt_made 0.313 |
## other2pt_attempts 0.235 |
## other2pt_pct 0.129 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.8233 2.3577 1.45399 1.29215 1.18882 1.10678 1.00292
## Proportion of Variance 0.3066 0.2138 0.08131 0.06422 0.05436 0.04711 0.03869
## Cumulative Proportion 0.3066 0.5204 0.60167 0.66589 0.72025 0.76736 0.80605
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.98328 0.87000 0.75739 0.66985 0.65034 0.64385 0.56902
## Proportion of Variance 0.03719 0.02911 0.02206 0.01726 0.01627 0.01594 0.01245
## Cumulative Proportion 0.84323 0.87234 0.89441 0.91166 0.92793 0.94388 0.95633
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.51470 0.49095 0.41133 0.40183 0.37408 0.26745 0.2038
## Proportion of Variance 0.01019 0.00927 0.00651 0.00621 0.00538 0.00275 0.0016
## Cumulative Proportion 0.96652 0.97579 0.98230 0.98851 0.99389 0.99664 0.9982
## PC22 PC23 PC24 PC25 PC26
## Standard deviation 0.1527 0.10681 0.07652 0.05949 0.04172
## Proportion of Variance 0.0009 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion 0.9991 0.99957 0.99980 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
## PC1 PC2
## Mo Bamba 5.12168936 -0.6472841174
## Jaxson Hayes 6.10602196 1.9333235932
## Harrison Barnes -1.11071750 0.4675508975
## Thomas Robinson 0.41276682 2.4752367530
## Brandon Miller -2.00076132 -1.9889065138
## Anthony Bennett 3.08683149 -0.9658417837
## Steven Adams 3.66839570 2.7680962536
## Nerlens Noel 5.45383914 -0.7991793564
## Cameron Johnson -1.47106716 2.1704255345
## Dennis Smith Jr. -2.43263222 -4.0080211081
## Wes Johnson 1.50316126 -1.3047269862
## Justise Winslow -0.36808543 -0.0140660045
## Jordan Hawkins -3.13721836 2.6918367917
## Alex Len 3.60602390 2.2253867363
## Malik Monk -2.28778612 -1.6797126865
## Nik Stauskas -3.13687982 1.8802991525
## Derrick Favors 5.01881131 -0.5372658952
## Tyler Herro -1.81846173 0.4356215772
## CJ McCollum -3.90112719 -0.5500261983
## Devin Booker -1.28579904 3.5063787159
## Evan Turner -1.49673522 -0.4578015058
## Julius Randle 2.20418418 -3.0357101236
## Ben Simmons 3.55778734 -6.3959006442
## Franz Wagner -1.24385093 2.2215940450
## Kevin Knox -0.63604976 -0.7501807107
## Aaron Gordon 2.71643469 -0.5168844660
## Jonathan Isaac 1.48927680 0.4512240082
## Trey Burke -3.60982350 -0.5620071724
## Trae Young -6.66738911 -7.9875515113
## Jalen Suggs -1.57680464 -1.3826079667
## Victor Oladipo -0.63903222 2.5888631078
## Jarace Walker 1.30148259 1.2591517315
## Joel Embiid 4.49751364 0.3052975967
## James Bouknight -1.52674532 0.8368198759
## Bennedict Mathurin -1.07663046 1.0805356809
## Josh Jackson 1.48957168 -3.0475464620
## Lauri Markkanen -0.18748839 0.1368727294
## Jabari Parker 2.40112005 -3.8466488862
## Noah Vonleh 1.11130925 -0.1751834144
## Miles Bridges -0.51099544 0.8957909493
## Zach Collins 2.23468578 2.2257336106
## Deandre Ayton 6.32878662 -3.9424880892
## Frank Kaminsky -0.20730366 4.1181677131
## Jabari Smith Jr. -1.62886867 -1.4477810399
## Terrence Ross -1.18817176 2.5860223513
## Otto Porter Jr. 0.04885497 1.8356037020
## Cody Zeller 2.02496946 -0.5680230150
## De'Aaron Fox -0.87066774 -3.3180112387
## Jaren Jackson Jr. 1.49089242 1.7016335898
## D'Angelo Russell -2.98270993 -2.6314533501
## Anthony Edwards -1.40251473 -2.6178813170
## Chris Duarte -1.92568686 1.3515954340
## DeMarcus Cousins 4.42466023 -2.5278733794
## P.J. Washington 0.33949853 1.0835362435
## Davion Mitchell -3.51406172 1.6468468321
## Bradley Beal -0.88415187 -0.6225122311
## Elfrid Payton -2.00506164 -0.8904543832
## Derrick Williams 1.03830203 -1.0304451116
## Wendell Carter Jr. 3.36498747 -0.7216499359
## Willie Cauley-Stein 2.65175786 4.0213075524
## Kendall Marshall -4.60320232 1.8524037126
## Patrick Patterson 2.53963602 2.1109010563
## Kentavious Caldwell-Pope -2.25315004 0.8810443210
## Michael Carter-Williams -2.91272861 0.8684079705
## Jamal Murray -2.37633667 -1.7128111826
## Shai Gilgeous-Alexander -2.04551424 -2.3234644917
## Gordon Hayward -0.68966882 0.8690154013
## Marvin Bagley III 5.65427831 -4.5448753922
## Dion Waiters -1.92316128 2.9143303356
## Kris Dunn -2.69748838 0.0951825819
## Collin Sexton -2.52183153 -3.9991906129
## Ben McLemore 0.24239908 -0.1870327074
## Doug McDermott -1.01124840 1.4023518631
## Myles Turner 1.86401362 1.8030642874
## Paul George -2.10747433 0.1876406410
## Austin Rivers -2.95329868 -1.3791937733
## Devin Vassell -0.72773722 4.1046446591
## Anthony Black -1.20227004 -1.7148572319
## Damian Lillard -4.29694802 -0.3843340708
## Cole Aldrich 2.78307370 3.9300845772
## Kyrie Irving -3.77122159 -3.0033308087
## Xavier Henry -1.24518982 0.8915202286
## Cameron Payne -4.48285978 -1.3236954751
## Dereck Lively II 5.06532407 4.9338352381
## Taurean Prince -1.44792106 3.7467662602
## Kira Lewis Jr. -3.26278428 -0.0954317440
## Anthony Davis 6.73630616 -1.2629548886
## Jeremy Lamb -0.50899046 2.6527408299
## Denzel Valentine -3.08887719 3.2154062834
## Michael Kidd-Gilchrist 1.49000302 -0.5168492854
## Meyers Leonard 2.74024056 3.6508169349
## De'Andre Hunter -0.77514281 2.0904369415
## Donovan Mitchell -3.15753551 1.8102604526
## Taylor Hendricks 1.15461505 0.2239571117
## Jaden Ivey -1.86477689 0.0773337674
## Buddy Hield -3.54777620 1.4535258538
## Bam Adebayo 5.99686501 -1.1625539440
## Greg Monroe 0.55772278 -0.6192425231
## Jalen Williams -1.80913049 1.6958213307
## Onyeka Okongwu 5.79890613 -2.6047417619
## Stanley Johnson -1.16931864 -1.1296582673
## Jakob Poeltl 3.07463033 1.0055357287
## Al-Farouq Aminu 1.02295866 -0.6557701582
## Gradey Dick -1.44740283 1.2927758040
## Ochai Agbaji -1.65972813 3.1665060860
## Jalen Duren 5.98165537 -0.5776256581
## Andre Drummond 5.95363392 0.5511351054
## Jett Howard -2.91653599 1.5725624278
## Tyrese Haliburton -2.28374305 2.7467761148
## Chet Holmgren 4.15750678 0.0605690899
## Ed Davis 3.31971345 3.1931177330
## Kelly Olynyk 0.73492325 3.8811181762
## Alec Burks -0.74320461 -2.0264013166
## Mikal Bridges -1.20162590 3.5903451628
## Andrew Wiggins 0.36542307 -2.6025198229
## Markieff Morris 1.29699896 4.0373945397
## RJ Barrett -0.08315247 -5.0528618008
## Johnny Davis -1.33669032 0.1478916084
## Obi Toppin 3.01791562 0.9098400603
## Tristan Thompson 4.60556911 -2.1556621458
## Patrick Williams -0.12807727 1.3341060184
## John Henson 2.97155356 3.1904829161
## Keegan Murray 1.59473589 0.7127863270
## Jalen Smith 2.10897015 1.8201712718
## Romeo Langford -0.73466257 -2.2444498661
## Karl-Anthony Towns 2.08103954 1.1196065654
## Trey Lyles 2.26213434 2.3189123450
## Kemba Walker -3.18777727 -0.5896508288
## Luke Kennard -2.76829877 0.9772815337
## Shabazz Muhammad 0.36507564 -2.1338042145
## Marcus Morris 0.73895771 2.6323130400
## Zion Williamson 5.07447989 -4.6504795504
## Cason Wallace -2.34104930 0.2152815400
## Jahlil Okafor 5.47754196 -2.7772955807
## Moses Moody -1.46960252 -1.4190061757
## Joshua Primo -1.55551858 3.7707023835
## Klay Thompson -3.84131304 0.2800845197
## Domantas Sabonis 1.67699707 1.2190984214
## Coby White -3.35981962 -0.9338304491
## Jimmer Fredette -4.12885438 0.1532925420
## John Wall -1.73494610 -3.9455798842
## Aaron Nesmith -3.28346405 1.6855751483
## Lonzo Ball -0.30972840 0.0007053551
## Jeremy Sochan 0.86229364 1.9652843807
## Brandon Knight -3.54151495 -2.0093567331
## Scottie Barnes -0.41005879 0.0002825293
## Ja Morant -2.49001353 -3.1186017067
## Cade Cunningham -2.90919016 -4.4345298643
## Isaac Okoro 0.42993503 -0.2975331821
## Jaylen Brown -0.86593793 -2.2952001302
## Jayson Tatum -0.78010288 -2.6444271533
## Jarrett Culver -1.51221150 0.0681535683
## Rui Hachimura 0.89990828 2.6695528608
## Brandon Ingram -1.66287033 -1.8607679236
## Cam Reddish -4.14985479 0.0842589880
## Markelle Fultz -2.05054203 -6.2356705555
## Jerome Robinson -3.05834301 0.2586574385
## T.J. Warren 1.08912829 -0.3669642309
## Ekpe Udoh 2.87709283 -2.0867754797
## Zach LaVine -1.60422247 2.6481909219
## Marcus Smart -2.59113529 -1.9709358714
## Ziaire Williams -1.78971475 0.2486334187
## Evan Mobley 4.26687599 -2.6519044433
## Marquese Chriss 2.77746275 -0.4521718014
## Paolo Banchero 0.71365922 -2.7656057765
fviz_nbclust(df_cluster, kmeans, method = 'wss')
fviz_nbclust(df_cluster, kmeans, method = 'silhouette')
fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')
k <- 15
df_cbb.kmeans <- kmeans(df_cluster, centers = k, nstart = 50)
df_cbb.kmeans2 <- kmeans(df_cluster, centers = 10, nstart = 50)
df_cbb.kmeans3 <- kmeans(df_cluster, centers = 5, nstart = 50)
df_cbb.kmeans
## K-means clustering with 15 clusters of sizes 1, 18, 8, 7, 17, 8, 15, 14, 11, 13, 13, 9, 13, 8, 10
##
## Cluster means:
## PC1 PC2
## 1 -6.6673891 -7.9875515
## 2 -1.5229678 -1.9259630
## 3 0.5984250 2.9518339
## 4 1.0651259 -3.2120996
## 5 -1.6278245 0.6366335
## 6 3.3507603 3.4891410
## 7 -1.3010475 2.9669816
## 8 -3.6051291 -0.4968607
## 9 -3.2787799 1.7867642
## 10 -0.1965191 0.1407494
## 11 1.9428659 -0.7028714
## 12 5.0209873 -3.5834690
## 13 2.0566469 1.4352045
## 14 -2.6116359 -3.9220472
## 15 5.5023843 -0.2136538
##
## Clustering vector:
## Mo Bamba Jaxson Hayes Harrison Barnes
## 15 15 5
## Thomas Robinson Brandon Miller Anthony Bennett
## 3 2 11
## Steven Adams Nerlens Noel Cameron Johnson
## 6 15 7
## Dennis Smith Jr. Wes Johnson Justise Winslow
## 14 11 10
## Jordan Hawkins Alex Len Malik Monk
## 9 6 2
## Nik Stauskas Derrick Favors Tyler Herro
## 9 15 5
## CJ McCollum Devin Booker Evan Turner
## 8 7 5
## Julius Randle Ben Simmons Franz Wagner
## 4 12 7
## Kevin Knox Aaron Gordon Jonathan Isaac
## 10 11 13
## Trey Burke Trae Young Jalen Suggs
## 8 1 2
## Victor Oladipo Jarace Walker Joel Embiid
## 7 13 15
## James Bouknight Bennedict Mathurin Josh Jackson
## 5 5 4
## Lauri Markkanen Jabari Parker Noah Vonleh
## 10 4 11
## Miles Bridges Zach Collins Deandre Ayton
## 10 13 12
## Frank Kaminsky Jabari Smith Jr. Terrence Ross
## 3 2 7
## Otto Porter Jr. Cody Zeller De'Aaron Fox
## 3 11 2
## Jaren Jackson Jr. D'Angelo Russell Anthony Edwards
## 13 14 2
## Chris Duarte DeMarcus Cousins P.J. Washington
## 5 12 10
## Davion Mitchell Bradley Beal Elfrid Payton
## 9 10 2
## Derrick Williams Wendell Carter Jr. Willie Cauley-Stein
## 11 11 6
## Kendall Marshall Patrick Patterson Kentavious Caldwell-Pope
## 9 13 5
## Michael Carter-Williams Jamal Murray Shai Gilgeous-Alexander
## 9 2 2
## Gordon Hayward Marvin Bagley III Dion Waiters
## 10 12 7
## Kris Dunn Collin Sexton Ben McLemore
## 8 14 10
## Doug McDermott Myles Turner Paul George
## 5 13 5
## Austin Rivers Devin Vassell Anthony Black
## 8 7 2
## Damian Lillard Cole Aldrich Kyrie Irving
## 8 6 14
## Xavier Henry Cameron Payne Dereck Lively II
## 5 8 6
## Taurean Prince Kira Lewis Jr. Anthony Davis
## 7 8 15
## Jeremy Lamb Denzel Valentine Michael Kidd-Gilchrist
## 7 9 11
## Meyers Leonard De'Andre Hunter Donovan Mitchell
## 6 7 9
## Taylor Hendricks Jaden Ivey Buddy Hield
## 11 5 9
## Bam Adebayo Greg Monroe Jalen Williams
## 15 10 5
## Onyeka Okongwu Stanley Johnson Jakob Poeltl
## 12 2 13
## Al-Farouq Aminu Gradey Dick Ochai Agbaji
## 11 5 7
## Jalen Duren Andre Drummond Jett Howard
## 15 15 9
## Tyrese Haliburton Chet Holmgren Ed Davis
## 7 15 6
## Kelly Olynyk Alec Burks Mikal Bridges
## 3 2 7
## Andrew Wiggins Markieff Morris RJ Barrett
## 4 3 4
## Johnny Davis Obi Toppin Tristan Thompson
## 5 13 12
## Patrick Williams John Henson Keegan Murray
## 10 6 13
## Jalen Smith Romeo Langford Karl-Anthony Towns
## 13 2 13
## Trey Lyles Kemba Walker Luke Kennard
## 13 8 9
## Shabazz Muhammad Marcus Morris Zion Williamson
## 4 3 12
## Cason Wallace Jahlil Okafor Moses Moody
## 5 12 2
## Joshua Primo Klay Thompson Domantas Sabonis
## 7 8 13
## Coby White Jimmer Fredette John Wall
## 8 8 14
## Aaron Nesmith Lonzo Ball Jeremy Sochan
## 9 10 3
## Brandon Knight Scottie Barnes Ja Morant
## 8 10 14
## Cade Cunningham Isaac Okoro Jaylen Brown
## 14 10 2
## Jayson Tatum Jarrett Culver Rui Hachimura
## 2 5 3
## Brandon Ingram Cam Reddish Markelle Fultz
## 2 8 14
## Jerome Robinson T.J. Warren Ekpe Udoh
## 8 11 11
## Zach LaVine Marcus Smart Ziaire Williams
## 7 2 5
## Evan Mobley Marquese Chriss Paolo Banchero
## 12 11 4
##
## Within cluster sum of squares by cluster:
## [1] 0.000000 12.092124 7.703117 11.267129 8.308983 9.318869 9.151351
## [8] 10.282343 7.057748 8.272202 13.467131 21.895704 8.222699 11.493769
## [15] 14.072811
## (between_SS / total_SS = 93.1 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 4, pointsize = 1, show.clust.cent = FALSE, repel = TRUE,
xlab = "Dimension 1", ylab = "Dimension 2", main = "Clustering with K-means") + scale_y_reverse()
cluster_assignments <- df_cbb.kmeans$cluster
cluster_df15 <- data.frame(value = cluster_assignments, name = names(cluster_assignments))
cluster_assignments2 <- df_cbb.kmeans2$cluster
cluster_df10 <- data.frame(value = cluster_assignments2, name = names(cluster_assignments2))
cluster_assignments3 <- df_cbb.kmeans3$cluster
cluster_df5 <- data.frame(value = cluster_assignments3, name = names(cluster_assignments3))
cluster_df15 <- as.tibble(cluster_df15)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cluster_df10 <- as.tibble(cluster_df10)
cluster_df5 <- as.tibble(cluster_df5)
cluster_df15 <- cluster_df15 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df10 <- cluster_df10 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df5 <- cluster_df5 |> rename(pc_cluster = value) |>
relocate(pc_cluster, .after = name)
cluster_df15
## # A tibble: 165 × 2
## name pc_cluster
## <chr> <int>
## 1 Mo Bamba 15
## 2 Jaxson Hayes 15
## 3 Harrison Barnes 5
## 4 Thomas Robinson 3
## 5 Brandon Miller 2
## 6 Anthony Bennett 11
## 7 Steven Adams 6
## 8 Nerlens Noel 15
## 9 Cameron Johnson 7
## 10 Dennis Smith Jr. 14
## # ℹ 155 more rows
df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Mo Bamba 15 2.1666667 2.2666667 0.956 3.633333 4.766667
## Jaxson Hayes 15 2.3125000 2.3437500 0.987 3.281250 3.843750
## Harrison Barnes 5 0.4533333 0.4533333 1.000 0.880000 1.200000
## Thomas Robinson 3 0.6666667 0.7904762 0.843 1.609524 2.495238
## Brandon Miller 2 0.6216216 0.7027027 0.885 2.297297 3.972973
## Anthony Bennett 11 1.5142857 1.6571429 0.914 2.857143 4.000000
## rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct
## Mo Bamba 0.762 0.532 0.7666667 2.500000 0.307
## Jaxson Hayes 0.854 0.829 0.5625000 1.437500 0.391
## Harrison Barnes 0.733 0.455 1.5066667 3.906667 0.386
## Thomas Robinson 0.645 0.627 0.7619048 2.180952 0.349
## Brandon Miller 0.578 0.259 0.8378378 2.513514 0.333
## Anthony Bennett 0.714 0.620 1.6000000 3.685714 0.434
## other2pt_asted fg2_pct fg3_per_g fg3a_per_g fg3_asted
## Mo Bamba 0.217 0.603 0.5 1.7 0.857
## Jaxson Hayes 0.333 0.728 0.0 0.0 0.000
## Harrison Barnes 0.204 0.469 1.5 4.4 0.837
## Thomas Robinson 0.500 0.525 0.1 0.1 0.857
## Brandon Miller 0.032 0.483 2.9 7.5 0.830
## Anthony Bennett 0.768 0.587 1.0 2.7 0.972
## fg3_pct_per_g games ft_per_g fta_per_g ast_per_g_college
## Mo Bamba 0.2941176 30 2.7 4.0 0.5
## Jaxson Hayes 0.0000000 32 2.3 3.1 0.3
## Harrison Barnes 0.3409091 75 3.1 4.3 1.3
## Thomas Robinson 1.0000000 105 2.2 3.6 1.0
## Brandon Miller 0.3866667 37 3.9 4.6 2.1
## Anthony Bennett 0.3703704 35 3.5 5.1 1.0
## orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## Mo Bamba 3.2 7.3 0.8 3.7 1.5
## Jaxson Hayes 1.8 3.3 0.6 2.2 0.9
## Harrison Barnes 2.0 3.5 0.9 0.4 1.9
## Thomas Robinson 2.1 5.2 0.6 0.7 1.6
## Brandon Miller 2.1 6.2 0.9 0.9 2.2
## Anthony Bennett 2.5 5.7 0.7 1.2 1.9
## pts_per_g_college
## Mo Bamba 12.9
## Jaxson Hayes 10.0
## Harrison Barnes 16.3
## Thomas Robinson 9.8
## Brandon Miller 18.8
## Anthony Bennett 16.1
df_cbb.sample |> group_by(group) |>
summarize(across(everything(), mean)) |> print(n = 15, width = Inf)
## # A tibble: 15 × 27
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 0 3.28 6.28 0.522
## 2 2 0.468 0.514 0.912 2.30 3.65 0.633
## 3 3 0.380 0.431 0.906 1.34 1.86 0.727
## 4 4 1.17 1.28 0.915 3.34 5.06 0.662
## 5 5 0.329 0.354 0.931 1.40 2.12 0.668
## 6 6 0.784 0.833 0.943 1.49 2.04 0.745
## 7 7 0.271 0.301 0.914 1.04 1.55 0.682
## 8 8 0.108 0.125 0.899 1.18 1.98 0.602
## 9 9 0.124 0.147 0.785 0.814 1.35 0.608
## 10 10 0.632 0.708 0.897 2.16 3.19 0.676
## 11 11 1.03 1.12 0.931 2.59 3.77 0.687
## 12 12 2.02 2.14 0.943 4.93 6.48 0.760
## 13 13 0.754 0.815 0.921 2.11 2.90 0.734
## 14 14 0.387 0.428 0.770 2.70 4.35 0.624
## 15 15 2.06 2.16 0.959 3.51 4.49 0.787
## rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.114 1.19 2.78 0.427 0.026 0.493
## 2 0.309 1.31 3.56 0.362 0.161 0.501
## 3 0.589 0.717 1.69 0.432 0.437 0.581
## 4 0.440 1.90 5.17 0.365 0.291 0.511
## 5 0.387 0.894 2.39 0.369 0.242 0.509
## 6 0.705 0.636 1.65 0.351 0.633 0.581
## 7 0.468 0.616 1.50 0.397 0.296 0.546
## 8 0.238 0.887 2.31 0.380 0.186 0.476
## 9 0.266 0.578 1.45 0.394 0.146 0.487
## 10 0.441 0.823 2.19 0.368 0.289 0.560
## 11 0.515 1.24 3.26 0.381 0.444 0.555
## 12 0.539 1.63 3.99 0.415 0.369 0.622
## 13 0.567 0.888 2.14 0.416 0.491 0.599
## 14 0.221 1.52 4.03 0.37 0.083 0.509
## 15 0.634 0.905 2.59 0.357 0.469 0.630
## fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.7 10.3 0.263 0.359 32 7.4 8.6
## 2 1.52 4.29 0.731 0.342 41.6 4.07 5.38
## 3 0.45 1.31 0.935 0.414 95.5 2.31 3.35
## 4 1.1 3.21 0.891 0.331 36.3 4.13 5.86
## 5 1.55 4.28 0.725 0.359 61.7 2.88 3.68
## 6 0.0125 0.125 0.375 0.0312 72.1 1.62 2.7
## 7 1.4 3.69 0.870 0.379 73.3 1.75 2.27
## 8 1.95 5.3 0.614 0.367 78.4 3.69 4.69
## 9 1.86 4.86 0.744 0.378 75.2 2.31 2.91
## 10 1.28 3.44 0.882 0.353 44 3 4.08
## 11 0.654 1.78 0.865 0.332 45.5 3.44 4.92
## 12 0.233 0.733 0.624 0.149 34.1 4.01 6.38
## 13 0.546 1.54 0.797 0.346 52.8 2.73 3.7
## 14 1.75 4.56 0.479 0.379 33.1 4.84 6.25
## 15 0.19 0.58 0.364 0.0888 32.3 2.62 4.11
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8.7 0.4 3.5 1.7 0.3 5.2
## 2 2.94 1.33 4.29 1.38 0.633 2.47
## 3 1.32 1.79 4.08 0.812 0.712 1.39
## 4 2.2 2.43 5.14 1.03 0.786 2.44
## 5 2.25 1.19 4.08 1.26 0.482 2.04
## 6 0.8 2.28 4.39 0.538 2.09 1.26
## 7 1.76 1.06 2.99 1.06 0.493 1.35
## 8 3.81 0.693 3.34 1.41 0.364 2.7
## 9 3.18 0.818 3.04 1.07 0.382 1.79
## 10 2.49 1.55 4.55 1.1 0.838 2.08
## 11 1.48 2.68 5.21 1.02 1.47 2.05
## 12 1.9 3.49 6.14 1.13 1.84 2.3
## 13 1.17 2.14 4.99 0.677 1.63 1.54
## 14 5.4 0.95 4 1.52 0.55 3.32
## 15 1.05 2.73 5.84 0.98 2.97 1.75
## pts_per_g_college
## <dbl>
## 1 27.4
## 2 16.6
## 3 10.5
## 4 17.9
## 5 14.6
## 6 8.34
## 7 11.1
## 8 16.9
## 9 12.6
## 10 13.9
## 11 14.7
## 12 17.9
## 13 12.5
## 14 19.1
## 15 12.0
df_cbb.sample |> group_by(group) |>
summarize(across(everything(), median))
## # A tibble: 15 × 27
## group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 0 3.28 6.28 0.522
## 2 2 0.447 0.457 0.916 2.22 3.71 0.640
## 3 3 0.358 0.381 0.926 1.34 1.82 0.735
## 4 4 1.05 1.13 0.924 3.3 4.92 0.67
## 5 5 0.298 0.344 0.938 1.44 2.27 0.667
## 6 6 0.721 0.783 0.954 1.34 1.78 0.741
## 7 7 0.302 0.32 0.902 1.08 1.52 0.692
## 8 8 0.108 0.108 0.966 1.04 1.70 0.594
## 9 9 0.136 0.167 0.833 0.766 1.35 0.617
## 10 10 0.541 0.641 0.9 2.37 3.33 0.657
## 11 11 0.85 0.9 0.938 2.59 3.69 0.699
## 12 12 1.94 2.08 0.951 4.82 6.64 0.762
## 13 13 0.766 0.812 0.93 2.12 2.89 0.722
## 14 14 0.345 0.405 0.91 2.66 4.33 0.628
## 15 15 2.24 2.31 0.958 3.63 4.56 0.776
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## # other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## # fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## # fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## # ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## # blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_career_stats |> filter(player == "Jaylen Brown") |>
select(player, pick_overall, year, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## # A tibble: 1 × 8
## player pick_overall year pts_per_g trb_per_g ast_per_g vorp g
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Jaylen Brown 3 2016 18.6 5.3 2.4 9.7 540
df_career_stats |> filter(year >= 2010)|>
group_by(pick_overall) |>
summarize(across(everything(), function(x) mean(x))) |>
select(pick_overall, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## Warning: There were 122 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) mean(x))`.
## ℹ In group 1: `pick_overall = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 121 remaining warnings.
## # A tibble: 61 × 6
## pick_overall pts_per_g trb_per_g ast_per_g vorp g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 18.9 6.65 4.28 14.2 380.
## 2 2 14.7 5.14 3.19 3.91 338.
## 3 3 17.4 6.58 3.31 12.0 420.
## 4 4 12 5.32 1.91 3 372.
## 5 5 12.5 4.61 3.36 4.84 377.
## 6 6 9.92 4.5 2.17 6.57 346.
## 7 7 12.4 5.08 2.51 3.77 430.
## 8 8 9.21 3.31 1.91 1.71 390.
## 9 9 10.3 4.76 2.31 5.61 418.
## 10 10 9.74 3.46 2.13 5.14 370.
## # ℹ 51 more rows
df_cluster2 <- df_cbb_scaled
df_cluster2$name <- rownames(df_cbb)
df_cluster2 <- df_cluster2 |> relocate(name, .before = dunk_made)
df_cluster2 <- column_to_rownames(df_cluster2, var = "name")
k1 <- 15
k2 <- 10
k3 <- 5
k15 <- kmeans(df_cluster2, centers = k1, nstart = 50)
k10 <- kmeans(df_cluster2, centers = k2, nstart = 50)
k5 <- kmeans(df_cluster2, centers = k3, nstart = 50)
temp_assign <- k15$cluster
combine <- function(df, kmeans) {
temp_assign <- kmeans$cluster
df_temp <- as.tibble(data.frame(value = temp_assign, name = names(temp_assign)))
if(!("all_cluster" %in% colnames(df_temp))) {
df_temp <- df_temp |> rename(all_cluster = value)
}
cluster_df <- left_join(df, df_temp, by = c("name"))
cluster_df <- cluster_df |> relocate(pc_cluster, .after = all_cluster)
# cluster_df <- cluster_df |> group_by(all_cluster, pc_cluster) |> summarize(n = n())
# return(cluster_df)
return(cluster_df)
}
groups15 <- combine(cluster_df15, k15) |> arrange(pc_cluster, all_cluster)
groups10 <- combine(cluster_df10, k10)
groups5 <- combine(cluster_df5, k5)
groups15
## # A tibble: 165 × 3
## name all_cluster pc_cluster
## <chr> <int> <int>
## 1 Trae Young 6 1
## 2 Elfrid Payton 1 2
## 3 Anthony Black 1 2
## 4 Jaylen Brown 1 2
## 5 Marcus Smart 1 2
## 6 De'Aaron Fox 2 2
## 7 Shai Gilgeous-Alexander 2 2
## 8 Jalen Suggs 10 2
## 9 Brandon Miller 14 2
## 10 Malik Monk 14 2
## # ℹ 155 more rows
df_cbb <- df_cbb |> rownames_to_column(var = "name")
df_groups <- left_join(df_cbb, groups15, by = c("name"))
df_cbb <- df_cbb |> column_to_rownames(var = "name")
library(ggforce)
df_groups <- df_groups |>
mutate(bust = if_else(name %in% bust_list, 1, 0),
good = if_else(name %in% good_list, 1, 0))
df_groups |> group_by(all_cluster) |>
summarize(n = n(),
across(everything(), mean),
ratio = good / bust) |>
select(-name) |>
print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `all_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
## all_cluster n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 15 0.433 0.481 0.902 1.98 3.01
## 2 2 9 0.543 0.589 0.896 3.14 5.06
## 3 3 3 2.12 2.17 0.979 3.57 4.34
## 4 4 22 0.684 0.743 0.920 1.88 2.70
## 5 5 2 0 0 0 1.42 2.14
## 6 6 1 0 0 0 3.28 6.28
## 7 7 4 2.30 2.44 0.942 5.86 7.67
## 8 8 22 0.263 0.294 0.887 1.10 1.78
## 9 9 14 1.28 1.39 0.925 3.13 4.45
## 10 10 7 0.411 0.445 0.934 1.88 2.72
## 11 11 10 1.96 2.09 0.941 3.73 5.00
## 12 12 12 0.0783 0.0854 0.932 1.11 1.83
## 13 13 14 0.376 0.430 0.896 1.23 1.72
## 14 14 19 0.466 0.512 0.907 1.98 3.13
## 15 15 11 0.780 0.823 0.947 1.78 2.40
## rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.663 0.314 0.574 1.92 0.290 0.176
## 2 0.618 0.228 1.62 4.33 0.366 0.0983
## 3 0.825 0.556 0.964 2.55 0.391 0.281
## 4 0.700 0.548 0.884 2.14 0.414 0.474
## 5 0.664 0.062 0.788 1.82 0.438 0.024
## 6 0.522 0.114 1.19 2.78 0.427 0.026
## 7 0.763 0.529 1.58 3.84 0.428 0.402
## 8 0.621 0.407 0.585 1.63 0.357 0.272
## 9 0.707 0.497 1.65 4.31 0.382 0.409
## 10 0.698 0.300 0.731 1.71 0.449 0.104
## 11 0.745 0.621 1.06 2.90 0.366 0.456
## 12 0.613 0.214 0.907 2.25 0.402 0.181
## 13 0.719 0.538 0.733 1.69 0.438 0.313
## 14 0.637 0.372 1.65 4.32 0.382 0.220
## 15 0.750 0.681 0.685 1.80 0.357 0.606
## fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.514 0.947 2.89 0.724 0.313 61.6 3.55 5.07
## 2 0.508 1.41 4 0.582 0.349 36.7 4.62 6.17
## 3 0.664 0.633 1.83 0.879 0.296 34 2.83 4.07
## 4 0.576 0.955 2.43 0.893 0.434 59.3 3.09 4.09
## 5 0.526 1.15 2.65 0.572 0.423 42 3.65 4.35
## 6 0.493 3.7 10.3 0.263 0.359 32 7.4 8.6
## 7 0.648 0.425 1.28 0.905 0.252 33.5 4.47 6.75
## 8 0.496 1.83 4.94 0.780 0.369 65.6 2.28 2.87
## 9 0.550 0.629 1.8 0.940 0.308 38.9 3.47 5.12
## 10 0.584 1.53 4.03 0.717 0.370 41.9 1.86 2.57
## 11 0.610 0 0.02 0 0 36.7 2.9 4.79
## 12 0.488 1.98 5.3 0.602 0.371 83.5 3.82 4.78
## 13 0.570 0.857 2.33 0.924 0.356 82.9 2.04 2.77
## 14 0.489 1.82 4.98 0.777 0.359 40.2 3.83 4.91
## 15 0.589 0.0182 0.182 0.432 0.0341 64.6 1.97 3.15
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.27 1.51 4.41 1.6 0.753 2.68
## 2 5.32 1.02 4.12 1.46 0.522 3.28
## 3 1.23 2.67 7.6 1 4.03 1.47
## 4 1.31 2.06 5.18 0.764 1.31 1.69
## 5 6.15 0.35 2.5 1.3 0.3 2.55
## 6 8.7 0.4 3.5 1.7 0.3 5.2
## 7 2.5 3.5 7.32 1.38 1.35 2.52
## 8 1.91 0.982 3.33 1.07 0.409 1.66
## 9 1.84 2.78 5.43 1.04 1.54 2.16
## 10 4.57 0.914 3.54 1.8 0.571 2.26
## 11 1.04 3.02 4.88 1 2.27 1.9
## 12 3.62 0.7 3.3 1.35 0.35 2.48
## 13 1.32 1.39 3.24 0.871 0.643 1.34
## 14 2.2 1.45 4.28 1.06 0.6 2.18
## 15 0.927 2.28 4.5 0.564 1.95 1.4
## pts_per_g_college pc_cluster bust good ratio
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 13.9 6.4 0 0.333 Inf
## 2 18.8 10.2 0 0.444 Inf
## 3 13.7 15 0 0.667 Inf
## 4 13.9 10.8 0.136 0.409 3
## 5 12.4 11.5 0.5 0.5 1
## 6 27.4 1 0 1 Inf
## 7 20.7 12 0 0.5 Inf
## 8 12.9 7.05 0.182 0.273 1.5
## 9 15.4 9.14 0.143 0.357 2.5
## 10 12.6 6.86 0 0.429 Inf
## 11 13.1 13.7 0.1 0.5 5
## 12 17.4 8.08 0.0833 0.5 6
## 13 10.7 5.5 0 0.286 Inf
## 14 17.1 4.05 0.158 0.316 2
## 15 9.08 8.09 0 0.455 Inf
df_pc <- left_join(df_cluster |> rownames_to_column("name"), groups15, by = "name")
df_pc <- left_join(df_pc, df_groups |> select(name, bust, good), by = "name")
df_pc <- df_pc |> column_to_rownames("name")
df_pc <- df_pc |>
mutate(
pc_cluster = as.factor(pc_cluster),
all_cluster = as.factor(all_cluster)
)
hulls <- df_pc |>
group_by(all_cluster) |>
slice(chull(PC1, PC2))
p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = all_cluster)) +
geom_polygon(data = hulls,
aes(group = all_cluster, color = all_cluster, fill = all_cluster),
alpha = 0.2) +
geom_point() +
geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) +
geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
scale_x_reverse() +
labs(title = "Clusters using higher dimensional data",
x = "Dimension 1",
y = "Dimension 2",
color = "Cluster",
fill = "Cluster"
)
p
p2 <- p +
geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
aes(label = name),
size = 1.6,
max.overlaps = 20,
fill = NA,
label.size = NA,
segment.size = 0.2
)
p2
df_groups |> group_by(pc_cluster) |>
summarize(n = n(),
across(everything(), mean),
ratio = good / bust) |>
select(-name) |>
print(width = Inf)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `pc_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 32
## pc_cluster n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 0 0 0 3.28 6.28
## 2 2 18 0.468 0.514 0.912 2.30 3.65
## 3 3 8 0.380 0.431 0.906 1.34 1.86
## 4 4 7 1.17 1.28 0.915 3.34 5.06
## 5 5 17 0.329 0.354 0.931 1.40 2.12
## 6 6 8 0.784 0.833 0.943 1.49 2.04
## 7 7 15 0.271 0.301 0.914 1.04 1.55
## 8 8 14 0.108 0.125 0.899 1.18 1.98
## 9 9 11 0.124 0.147 0.785 0.814 1.35
## 10 10 13 0.632 0.708 0.897 2.16 3.19
## 11 11 13 1.03 1.12 0.931 2.59 3.77
## 12 12 9 2.02 2.14 0.943 4.93 6.48
## 13 13 13 0.754 0.815 0.921 2.11 2.90
## 14 14 8 0.387 0.428 0.770 2.70 4.35
## 15 15 10 2.06 2.16 0.959 3.51 4.49
## rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.522 0.114 1.19 2.78 0.427 0.026
## 2 0.633 0.309 1.31 3.56 0.362 0.161
## 3 0.727 0.589 0.717 1.69 0.432 0.437
## 4 0.662 0.440 1.90 5.17 0.365 0.291
## 5 0.668 0.387 0.894 2.39 0.369 0.242
## 6 0.745 0.705 0.636 1.65 0.351 0.633
## 7 0.682 0.468 0.616 1.50 0.397 0.296
## 8 0.602 0.238 0.887 2.31 0.380 0.186
## 9 0.608 0.266 0.578 1.45 0.394 0.146
## 10 0.676 0.441 0.823 2.19 0.368 0.289
## 11 0.687 0.515 1.24 3.26 0.381 0.444
## 12 0.760 0.539 1.63 3.99 0.415 0.369
## 13 0.734 0.567 0.888 2.14 0.416 0.491
## 14 0.624 0.221 1.52 4.03 0.37 0.083
## 15 0.787 0.634 0.905 2.59 0.357 0.469
## fg2_pct fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.493 3.7 10.3 0.263 0.359 32 7.4 8.6
## 2 0.501 1.52 4.29 0.731 0.342 41.6 4.07 5.38
## 3 0.581 0.45 1.31 0.935 0.414 95.5 2.31 3.35
## 4 0.511 1.1 3.21 0.891 0.331 36.3 4.13 5.86
## 5 0.509 1.55 4.28 0.725 0.359 61.7 2.88 3.68
## 6 0.581 0.0125 0.125 0.375 0.0312 72.1 1.62 2.7
## 7 0.546 1.4 3.69 0.870 0.379 73.3 1.75 2.27
## 8 0.476 1.95 5.3 0.614 0.367 78.4 3.69 4.69
## 9 0.487 1.86 4.86 0.744 0.378 75.2 2.31 2.91
## 10 0.560 1.28 3.44 0.882 0.353 44 3 4.08
## 11 0.555 0.654 1.78 0.865 0.332 45.5 3.44 4.92
## 12 0.622 0.233 0.733 0.624 0.149 34.1 4.01 6.38
## 13 0.599 0.546 1.54 0.797 0.346 52.8 2.73 3.7
## 14 0.509 1.75 4.56 0.479 0.379 33.1 4.84 6.25
## 15 0.630 0.19 0.58 0.364 0.0888 32.3 2.62 4.11
## ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8.7 0.4 3.5 1.7 0.3 5.2
## 2 2.94 1.33 4.29 1.38 0.633 2.47
## 3 1.32 1.79 4.08 0.812 0.712 1.39
## 4 2.2 2.43 5.14 1.03 0.786 2.44
## 5 2.25 1.19 4.08 1.26 0.482 2.04
## 6 0.8 2.28 4.39 0.538 2.09 1.26
## 7 1.76 1.06 2.99 1.06 0.493 1.35
## 8 3.81 0.693 3.34 1.41 0.364 2.7
## 9 3.18 0.818 3.04 1.07 0.382 1.79
## 10 2.49 1.55 4.55 1.1 0.838 2.08
## 11 1.48 2.68 5.21 1.02 1.47 2.05
## 12 1.9 3.49 6.14 1.13 1.84 2.3
## 13 1.17 2.14 4.99 0.677 1.63 1.54
## 14 5.4 0.95 4 1.52 0.55 3.32
## 15 1.05 2.73 5.84 0.98 2.97 1.75
## pts_per_g_college all_cluster bust good ratio
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 27.4 6 0 1 Inf
## 2 16.6 9.56 0.0556 0.444 8
## 3 10.5 10.8 0.125 0.25 2
## 4 17.9 9.43 0 0.143 Inf
## 5 14.6 7.41 0.235 0.235 1
## 6 8.34 15 0 0.375 Inf
## 7 11.1 10.5 0 0.333 Inf
## 8 16.9 11.1 0.0714 0.429 6
## 9 12.6 7.64 0.182 0.182 1
## 10 13.9 5.46 0.154 0.462 3
## 11 14.7 7.38 0.231 0.231 1
## 12 17.9 8.78 0.111 0.556 5
## 13 12.5 5.69 0 0.538 Inf
## 14 19.1 3.88 0 0.5 Inf
## 15 12.0 9 0 0.7 Inf
hulls <- df_pc |>
group_by(pc_cluster) |>
slice(chull(PC1, PC2))
p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = pc_cluster)) +
geom_polygon(data = hulls,
aes(group = pc_cluster, color = pc_cluster, fill = pc_cluster),
alpha = 0.2) +
geom_point() +
geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) +
geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
scale_y_reverse() +
labs(title = "Clusters with Busts and Successes",
x = "Dimension 1",
y = "Dimension 2",
color = "Cluster",
fill = "Cluster"
)
p
p2 <- p +
geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
aes(label = name),
size = 1.6,
max.overlaps = 20,
fill = NA,
label.size = NA,
segment.size = 0.2
)
p2